In [1]:
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import loompy
import seaborn as sb

import cellrank as cr
from cellrank.tl.estimators import GPCCA
from cellrank.tl.kernels import VelocityKernel

import scvelo as scv
scv.__version__

%matplotlib inline
In [2]:
wgenes=pd.read_csv('/Users/menghan/Documents/projects/genome_ref/ensembl97_wgenes.txt',sep='\t')

nasal

In [3]:
adata=sc.read_loom("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_nasal.loom")
ldata = scv.read("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/velocity_9samples.loom", cache=True)

#remove W chr genes
wolp=adata.var_names.intersection(wgenes['ensembl_gene_id'])
print(wolp)
w_gene_indicator = np.in1d(adata.var_names, wgenes['ensembl_gene_id'])
adata = adata[:, ~w_gene_indicator]

adata_loom = ldata[:, adata.var_names]
adata = scv.utils.merge(adata, adata_loom); del ldata

#rename seurat_clusters
sk_module=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_nasal_skModule.txt',sep='\t')
adata.obs['seurat_clusters']=sk_module['seurat_clusters'].values

adata.obs["seurat_clusters"] = adata.obs["seurat_clusters"].astype('category')
scv.pl.proportions(adata, groupby="seurat_clusters")
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')

print(adata.obs['seurat_clusters'].cat.categories)
recolor=["#65000B","#FC89AC","#FFC0CB","#DC0078","#B0445C","#FCAACF"]
adata.uns['seurat_clusters_colors']=recolor

scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')  
Index(['ENSGALG00000040780', 'ENSGALG00000035998', 'ENSGALG00000048286',
       'ENSGALG00000035785', 'ENSGALG00000040263', 'ENSGALG00000029545',
       'ENSGALG00000033705', 'ENSGALG00000043758', 'ENSGALG00000047434',
       'ENSGALG00000048542', 'ENSGALG00000048307', 'ENSGALG00000041221',
       'ENSGALG00000047904', 'ENSGALG00000030382', 'ENSGALG00000031327',
       'ENSGALG00000034488'],
      dtype='object')
Renamed 'xtsne_cell_embeddings' to convention 'X_xtsne_cell_embeddings' (adata.obsm).
Index(['1', '2', '3', '4', '5', '6'], dtype='object')
In [4]:
## run scVelo
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

scv.tl.recover_dynamics(adata)

scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
Filtered out 2556 genes that are detected 20 counts (shared).
WARNING: Did not normalize X as it looks processed already. To enforce normalization, set `enforce=True`.
Normalized count data: spliced, unspliced.
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
WARNING: Did not modify X as it looks preprocessed already.
computing moments based on connectivities
    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
recovering dynamics
    finished (0:00:51) --> added 
    'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
    finished (0:00:00) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph
    finished (0:00:02) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
In [14]:
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='seurat_clusters',legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, 
                                 dpi=120,save="figure1_i")

#error: https://github.com/theislab/scvelo/issues/279
#plt.savefig('/Users/menghan/Documents/projects/skeletonConvergence/manuscript/figures/figure1_formal/figure1_I-0.pdf',dpi=300)
computing velocity embedding
    finished (0:00:00) --> added
    'velocity_xtsne_cell_embeddings', embedded velocity vectors (adata.obsm)
saving figure to file ./figures/scvelo_figure1_i.pdf
In [5]:
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap

#https://matplotlib.org/stable/tutorials/colors/colormaps.html
#https://matplotlib.org/3.1.0/tutorials/colors/colormap-manipulation.html

def plot_examples(cms):
    """
    helper function to plot two colormaps
    """
    np.random.seed(19680801)
    data = np.random.randn(30, 30)

    fig, axs = plt.subplots(1, 2, figsize=(6, 3), constrained_layout=True)
    for [ax, cmap] in zip(axs, cms):
        psm = ax.pcolormesh(data, cmap=cmap, rasterized=True, vmin=-3, vmax=3)
        fig.colorbar(psm, ax=ax)
    plt.show()

top = cm.get_cmap('gray', 256)
bottom = cm.get_cmap('YlOrRd', 256)
gray1 = np.array([211/256, 211/256, 211/256, 1])
gray2 = np.array([189/256, 189/256, 189/256, 1])
gray3 = np.array([126/256, 126/256, 126/256, 1])
newcolors = np.vstack((top(np.linspace(0.5, 1, 84)),
                       bottom(np.linspace(0, 1, 172))))
#from colormap import hex2rgb
#print(hex2rgb('#d3d3d3'));print(hex2rgb('#bdbdbd'));print(hex2rgb('#7e7e7e'))
#newcolors[:28, :] = gray3; newcolors[:42, :] = gray2; newcolors[42:84, :] = gray1

newcmp = ListedColormap(newcolors, name='GrayRed')
plot_examples([newcmp])
In [6]:
#check if SOX9 in the variable genes
print('ENSGALG00000004386' in adata.var_names)

sox9=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_nasal_sox9Exp.txt',sep='\t')
adata.obs['SOX9_RNA']=sox9['x'].values

#if not in adata.var_names, then cannot plot
#use SOX9 expression as color
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='SOX9_RNA', color_map=newcmp,
                                 legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_i_sox9")
False
computing velocity embedding
    finished (0:00:00) --> added
    'velocity_xtsne_cell_embeddings', embedded velocity vectors (adata.obsm)
saving figure to file ./figures/scvelo_figure1_i_sox9.pdf
In [7]:
adata.obs['skeletal_Module']=sk_module['skeletal_Features1'].values

#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='skeletal_Module', color_map=newcmp,
                                 legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_i_skModule")
saving figure to file ./figures/scvelo_figure1_i_skModule.pdf
In [8]:
#https://cellrank.readthedocs.io/en/latest/kernels_and_estimators.html#Infer-terminal-states

#Combining two kernels
#RNA velocity vectors can sometimes be very noisy - to regularize them, let’s combine the VelocityKernel with a ConnectivityKernel, computed on the basis of transcriptomic similarity
vk = VelocityKernel(adata)
vk.compute_transition_matrix()
print(vk)
from cellrank.tl.kernels import ConnectivityKernel
ck = ConnectivityKernel(adata).compute_transition_matrix()
combined_kernel = 0.8 * vk + 0.2 * ck
print(combined_kernel)

<Velo[softmax_scale=8.19, mode=deterministic, seed=7683]>
((0.8 * <Velo[softmax_scale=8.19, mode=deterministic, seed=7683]>) + (0.2 * <Conn[dnorm=True]>))
In [9]:
g = GPCCA(combined_kernel)
print(g)
g.compute_schur(n_components=20)
#Above, we plotted the top 20 eigenvalues of the matrix T to see whether there is an apparent eigengap. 
g.plot_spectrum()
WARNING: Computing transition matrix using the default parameters
GPCCA[n=4483, kernel=((0.8 * <Velo[softmax_scale=8.19, mode=deterministic, seed=7683]>) + (0.2 * <Conn[dnorm=True]>))]
In [10]:
g.plot_schur(use=4, basis='xtsne_cell_embeddings')
In [11]:
#Infer terminal states
#The next step in GPCCA is to find a linear combination of these vectors such that the Markov chain defined on the subset of states has large self-transition probability
g.compute_macrostates(n_states=4, cluster_key='seurat_clusters')
g.plot_macrostates(basis='xtsne_cell_embeddings')
g.plot_macrostates(same_plot=False,basis='xtsne_cell_embeddings')
g.plot_macrostates(discrete=True,basis='xtsne_cell_embeddings')
INFO: Using pre-computed schur decomposition
In [12]:
# manually restrict the macrostates by passing a list of macrostate-names that you know are terminal in your data.
g.set_terminal_states_from_macrostates(['1','3','5'])
cr.pl.terminal_states(adata,basis="xtsne_cell_embeddings", dpi=150)
In [13]:
#Estimate fate probabilities
g.compute_absorption_probabilities()
g.absorption_probabilities
g.plot_absorption_probabilities(basis="xtsne_cell_embeddings")
g.plot_absorption_probabilities(same_plot=False,basis="xtsne_cell_embeddings", lineages=['3'], show_dp=False)
g.absorption_probabilities
Out[13]:
135
0.9185400.0300380.051422
0.9062310.0449910.048778
0.8890010.0455990.065400
0.7971300.0520620.150808
0.9321760.0299780.037845
0.2648960.6797990.055305
0.8618660.0498210.088313
0.8450160.0631860.091798
0.8690670.0673370.063596
0.8700000.0466800.083320
.........
0.8927390.0384340.068827
0.6158830.2524150.131702
0.9287650.0270340.044202
0.9272340.0288010.043965
0.9142790.0324140.053308
0.8644600.0640210.071520
0.7511800.0591000.189721
0.9226360.0321020.045262
0.7867410.0951230.118136
0.7751850.1104400.114376

4483 cells x 3 lineages

In [ ]:
#save
adata.obs.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_nasal_cellrank_obs.txt')
adata.var.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_nasal_cellrank_var.txt')
adata.write('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_nasal_cellrank.hdf5')
In [ ]:
 

somite

In [14]:
adata=sc.read_loom("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_somite.loom")
ldata = scv.read("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/velocity_9samples.loom", cache=True)

#remove W chr genes
wolp=adata.var_names.intersection(wgenes['ensembl_gene_id'])
print(wolp)
w_gene_indicator = np.in1d(adata.var_names, wgenes['ensembl_gene_id'])
adata = adata[:, ~w_gene_indicator]

adata_loom = ldata[:, adata.var_names]
adata = scv.utils.merge(adata, adata_loom); del ldata

adata.obs["seurat_clusters"] = adata.obs["seurat_clusters"].astype('category')
scv.pl.proportions(adata, groupby="seurat_clusters")
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')

print(adata.obs['seurat_clusters'].cat.categories)
recolor=["#8BD6FF","#008FD4","#00416A","#00CCFF"]
adata.uns['seurat_clusters_colors']=recolor

scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters') 
Index(['ENSGALG00000040780', 'ENSGALG00000043758', 'ENSGALG00000035998',
       'ENSGALG00000048286', 'ENSGALG00000040263', 'ENSGALG00000033705',
       'ENSGALG00000034488', 'ENSGALG00000040704', 'ENSGALG00000041221',
       'ENSGALG00000035785', 'ENSGALG00000040086'],
      dtype='object')
Renamed 'xtsne_cell_embeddings' to convention 'X_xtsne_cell_embeddings' (adata.obsm).
Index(['1', '2', '3', '4'], dtype='object')
In [15]:
## run scVelo
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

scv.tl.recover_dynamics(adata)

scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
Filtered out 3147 genes that are detected 20 counts (shared).
WARNING: Did not normalize X as it looks processed already. To enforce normalization, set `enforce=True`.
Normalized count data: spliced, unspliced.
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
WARNING: Did not modify X as it looks preprocessed already.
computing moments based on connectivities
    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
recovering dynamics
    finished (0:00:21) --> added 
    'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
    finished (0:00:00) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph
    finished (0:00:00) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
In [16]:
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='seurat_clusters',legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, 
                                 dpi=120,save="figure1_j")

#error: https://github.com/theislab/scvelo/issues/279
#plt.savefig('/Users/menghan/Documents/projects/skeletonConvergence/manuscript/figures/figure1_formal/figure1_J-0.pdf',dpi=300)
computing velocity embedding
    finished (0:00:00) --> added
    'velocity_xtsne_cell_embeddings', embedded velocity vectors (adata.obsm)
saving figure to file ./figures/scvelo_figure1_j.pdf
In [17]:
#check if SOX9 in the variable genes
print('ENSGALG00000004386' in adata.var_names)

sox9=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_somite_sox9Exp.txt',sep='\t')
adata.obs['SOX9_RNA']=sox9['x'].values

#if not in adata.var_names, then cannot plot
#use SOX9 expression as color
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='SOX9_RNA', color_map=newcmp,
                                 legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_j_sox9")
False
saving figure to file ./figures/scvelo_figure1_j_sox9.pdf
In [18]:
sk_module=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_somite_skModule.txt',sep='\t')
adata.obs['skeletal_Module']=sk_module['skeletal_Features1'].values

#if not in adata.var_names, then cannot plot
#use SOX9 expression as color
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='skeletal_Module', color_map=newcmp,
                                 legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_j_skModule")
saving figure to file ./figures/scvelo_figure1_j_skModule.pdf
In [19]:
#Combining two kernels
#RNA velocity vectors can sometimes be very noisy - to regularize them, let’s combine the VelocityKernel with a ConnectivityKernel, computed on the basis of transcriptomic similarity
vk = VelocityKernel(adata)
vk.compute_transition_matrix()
print(vk)
from cellrank.tl.kernels import ConnectivityKernel
ck = ConnectivityKernel(adata).compute_transition_matrix()
combined_kernel = 0.8 * vk + 0.2 * ck
print(combined_kernel)

<Velo[softmax_scale=8.5, mode=deterministic, seed=27844]>
((0.8 * <Velo[softmax_scale=8.5, mode=deterministic, seed=27844]>) + (0.2 * <Conn[dnorm=True]>))
In [20]:
g = GPCCA(combined_kernel)
print(g)
g.compute_schur(n_components=20)
#Above, we plotted the top 20 eigenvalues of the matrix T to see whether there is an apparent eigengap. 
g.plot_spectrum()
WARNING: Computing transition matrix using the default parameters
GPCCA[n=1857, kernel=((0.8 * <Velo[softmax_scale=8.5, mode=deterministic, seed=27844]>) + (0.2 * <Conn[dnorm=True]>))]
In [21]:
g.plot_schur(use=3, basis='xtsne_cell_embeddings')
In [22]:
#Infer terminal states
#The next step in GPCCA is to find a linear combination of these vectors such that the Markov chain defined on the subset of states has large self-transition probability
g.compute_macrostates(n_states=3, cluster_key='seurat_clusters')
g.plot_macrostates(basis='xtsne_cell_embeddings')
g.plot_macrostates(same_plot=False,basis='xtsne_cell_embeddings')
g.plot_macrostates(discrete=True,basis='xtsne_cell_embeddings')
INFO: Using pre-computed schur decomposition
In [23]:
# manually restrict the macrostates by passing a list of macrostate-names that you know are terminal in your data.
g.set_terminal_states_from_macrostates(['2','1'])
cr.pl.terminal_states(adata,basis="xtsne_cell_embeddings", dpi=150)
In [24]:
#Estimate fate probabilities
g.compute_absorption_probabilities()
g.absorption_probabilities
g.plot_absorption_probabilities(basis="xtsne_cell_embeddings")
g.plot_absorption_probabilities(same_plot=False,basis="xtsne_cell_embeddings", lineages=['2'], show_dp=False)
g.absorption_probabilities
Out[24]:
21
0.1556430.844357
0.0938480.906152
0.1382120.861788
0.1660120.833988
0.1256420.874358
0.1518610.848139
0.2537750.746225
0.1100070.889993
0.1064170.893583
0.0402010.959799
......
0.0543230.945677
0.0766130.923387
0.1520830.847917
0.1754140.824586
0.1549920.845008
0.0615160.938484
0.2172130.782787
0.2430620.756938
0.0751300.924870
0.0795380.920462

1857 cells x 2 lineages

In [ ]:
#save
adata.obs.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_somite_cellrank_obs.txt')
adata.var.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_somite_cellrank_var.txt')
adata.write('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_somite_cellrank.hdf5')
In [ ]:
 

limb

In [25]:
adata=sc.read_loom("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_limb.loom")
ldata = scv.read("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/velocity_9samples.loom", cache=True)

#remove W chr genes
wolp=adata.var_names.intersection(wgenes['ensembl_gene_id'])
print(wolp)
w_gene_indicator = np.in1d(adata.var_names, wgenes['ensembl_gene_id'])
adata = adata[:, ~w_gene_indicator]

adata_loom = ldata[:, adata.var_names]
adata = scv.utils.merge(adata, adata_loom); del ldata

adata.obs["seurat_clusters"] = adata.obs["seurat_clusters"].astype('category')
scv.pl.proportions(adata, groupby="seurat_clusters")
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')

print(adata.obs['seurat_clusters'].cat.categories)
recolor=["#FFE78A","#FFB817","#662D0B","#FFD01F","#FFA309","#C49606","#88540B"]
adata.uns['seurat_clusters_colors']=recolor

scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters') 
Index(['ENSGALG00000047434'], dtype='object')
Renamed 'xtsne_cell_embeddings' to convention 'X_xtsne_cell_embeddings' (adata.obsm).
Index(['1', '2', '3', '4', '5', '6', '7'], dtype='object')
In [26]:
## run scVelo
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

scv.tl.recover_dynamics(adata)

scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
Filtered out 2490 genes that are detected 20 counts (shared).
WARNING: Did not normalize X as it looks processed already. To enforce normalization, set `enforce=True`.
Normalized count data: spliced, unspliced.
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
WARNING: Did not modify X as it looks preprocessed already.
computing moments based on connectivities
    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
recovering dynamics
    finished (0:01:52) --> added 
    'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
    finished (0:00:01) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph
    finished (0:00:04) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
In [27]:
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='seurat_clusters',legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, 
                                 dpi=120,save="figure1_k")

#error: https://github.com/theislab/scvelo/issues/279
#plt.savefig('/Users/menghan/Documents/projects/skeletonConvergence/manuscript/figures/figure1_formal/figure1_K-0.pdf',dpi=300)
computing velocity embedding
    finished (0:00:01) --> added
    'velocity_xtsne_cell_embeddings', embedded velocity vectors (adata.obsm)
saving figure to file ./figures/scvelo_figure1_k.pdf
In [28]:
#check if SOX9 in the variable genes
print('ENSGALG00000004386' in adata.var_names)

sox9=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_limb_sox9Exp.txt',sep='\t')
adata.obs['SOX9_RNA']=sox9['x'].values

#if not in adata.var_names, then cannot plot
#use SOX9 expression as color
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='SOX9_RNA', color_map=newcmp,
                                 legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_k_sox9")
False
saving figure to file ./figures/scvelo_figure1_k_sox9.pdf
In [29]:
sk_module=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_limb_skModule.txt',sep='\t')
adata.obs['skeletal_Module']=sk_module['skeletal_Features1'].values

#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='skeletal_Module', color_map=newcmp,
                                 legend_fontsize=8, smooth=.8, min_mass=3, 
                                 title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_k_skModule")
saving figure to file ./figures/scvelo_figure1_k_skModule.pdf
In [30]:
#Combining two kernels
#RNA velocity vectors can sometimes be very noisy - to regularize them, let’s combine the VelocityKernel with a ConnectivityKernel, computed on the basis of transcriptomic similarity
vk = VelocityKernel(adata)
vk.compute_transition_matrix()
print(vk)
from cellrank.tl.kernels import ConnectivityKernel
ck = ConnectivityKernel(adata).compute_transition_matrix()
combined_kernel = 0.8 * vk + 0.2 * ck
print(combined_kernel)

<Velo[softmax_scale=10.96, mode=deterministic, seed=13856]>
((0.8 * <Velo[softmax_scale=10.96, mode=deterministic, seed=13856]>) + (0.2 * <Conn[dnorm=True]>))
In [31]:
g = GPCCA(combined_kernel)
print(g)
g.compute_schur(n_components=20)
#Above, we plotted the top 20 eigenvalues of the matrix T to see whether there is an apparent eigengap. 
g.plot_spectrum()
WARNING: Computing transition matrix using the default parameters
GPCCA[n=7611, kernel=((0.8 * <Velo[softmax_scale=10.96, mode=deterministic, seed=13856]>) + (0.2 * <Conn[dnorm=True]>))]
WARNING: Using `20` components would split a block of complex conjugates. Increasing `n_components` to `21`
In [32]:
g.plot_schur(use=3, basis='xtsne_cell_embeddings')
In [33]:
#Infer terminal states
#The next step in GPCCA is to find a linear combination of these vectors such that the Markov chain defined on the subset of states has large self-transition probability
g.compute_macrostates(n_states=3, cluster_key='seurat_clusters')
g.plot_macrostates(basis='xtsne_cell_embeddings')
g.plot_macrostates(same_plot=False,basis='xtsne_cell_embeddings')
g.plot_macrostates(discrete=True,basis='xtsne_cell_embeddings')
INFO: Using pre-computed schur decomposition
In [34]:
# manually restrict the macrostates by passing a list of macrostate-names that you know are terminal in your data.
g.set_terminal_states_from_macrostates(['5','6','1'])
#g.set_terminal_states(['5','3','2'])
cr.pl.terminal_states(adata,basis="xtsne_cell_embeddings", dpi=150)
In [35]:
#Estimate fate probabilities
g.compute_absorption_probabilities()
g.absorption_probabilities
g.plot_absorption_probabilities(basis="xtsne_cell_embeddings")
g.plot_absorption_probabilities(same_plot=False,basis="xtsne_cell_embeddings", lineages=['5'], show_dp=False)
g.absorption_probabilities
Out[35]:
561
0.9671580.0325330.000309
0.9969150.0029920.000093
0.9451080.0545990.000294
0.9832220.0151130.001665
0.9811660.0184660.000368
0.9723400.0273990.000261
0.9881440.0112440.000613
0.9467200.0530940.000186
0.9700280.0162060.013766
0.9961030.0037380.000159
.........
0.9996680.0003280.000004
0.9852680.0136780.001054
0.9166710.0830510.000277
0.9773120.0216920.000996
0.9999470.0000530.000001
0.9963810.0035670.000052
0.9864890.0133120.000199
0.9183240.0815510.000125
0.8947950.1049310.000274
0.9918720.0077320.000396

7611 cells x 3 lineages

In [ ]:
#save
adata.obs.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_limb_cellrank_obs.txt')
adata.var.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_limb_cellrank_var.txt')
adata.write('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_limb_cellrank.hdf5')
In [ ]: